
*** Table H.18: Demographic comparison

/*******************************************************************************
	Import Files 
*******************************************************************************/	
	
	*survey population distribution by region
	
	use "$datadir/survey/cleaned_output_20220314", replace
	
	cd "$outputdir_tab"
	
	isid ipaddress
	encode ipaddress, g(ip)
	
	collapse (count) survey_num = ip, by(q19_1)
	
	egen total = total(survey_num)
	
	gen percentage = survey_num/total
	
	keep q19_1 percentage
	ren q19_1 state
	
	#delimit;
	gen region =  "Northern Zone" 
							if inlist(state, "Chandigarh (UT)", "Delhi",
							          "Haryana", "Himachal Pradesh", 
									  "Jammu and Kashmir", "Ladakh (UT)", 
									  "Punjab", "Rajasthan");
									 
	replace region = "Southern Zone" 
							if inlist(state, "Andhra Pradesh", "Karnataka", 
									  "Kerala", "Puducherry (UT)", 
									  "Tamil Nadu")	;						 
									 
	replace region = "Central Zone"
		                    if inlist(state, "Chhattisgarh", "Madhya Pradesh", 
									 "Uttar Pradesh", "Uttarakhand" );
	replace region = "Eastern Zone"
							if inlist(state, "Bihar", "Jharkhand", 
									 "Orissa", "West Bengal" );
									 
	replace region = "Western Zone"
							if inlist(state, "Goa", "Gujarat", 
									 "Maharashtra" );
									
	replace region = "North Eastern Zone"	
							if inlist(state, "Arunachal Pradesh", "Manipur", 
									 "Assam");

	#delimit cr
	
	assert region != ""
	
	collapse (sum) survey_pop= percentage, by(region)
	
	keep survey_pop region
	
	tempfile survey_region
	save `survey_region', replace
	
	* survey distribution by age
	
	use "$datadir/survey/cleaned_output_20220314", replace
	
	cd "$outputdir_tab"
	
	isid ipaddress
	encode ipaddress, g(ip)
	
	collapse (count) survey_num = ip, by(q18)
	
	ren q18 age_group
	
	egen total_pop = total(survey_num)
	gen survey_pop = survey_num/total_pop
	
	keep survey_pop age_group
	ren age_group age
	
	tempfile survey_age
	save `survey_age'
	
	*indian census 2011 all data
	import excel "$datadir/survey/DDW-0000C-13.xls", sheet("C-13") cellrange(A8:N3715) clear
	
	keep D-F
	
	ren D state 
	ren E age
	ren F pop
	
	keep if !inlist(age, "All ages", "Age not stated")
	
	replace age = "100" if age == "100+"
	destring age, replace
	keep if age >= 18
	
	bys state: egen state_pop = total(pop)
	
	tempfile census_all
	save `census_all', replace
	
	use `census_all', clear
			
	keep if state != "India"
	
	replace state = regexr(state, "State - ", "")
	replace state = regexr(state, "\([0-9]+\)", "")
	replace state = trim(proper(state))
	replace state = regexr(state, "&" , "and")
	
	keep state state_pop
	duplicates drop
	
	#delimit;
	gen region =  "Northern Zone" 
							if inlist(state, "Chandigarh", "Nct Of Delhi",
							          "Haryana", "Himachal Pradesh", 
									  "Jammu and Kashmir", "Ladakh", 
									  "Punjab", "Rajasthan");
									 
	replace region = "Southern Zone" 
							if inlist(state, "Andaman and Nicobar Islands", 
									  "Andhra Pradesh", "Karnataka", 
									  "Kerala", "Puducherry", 
									  "Tamil Nadu", "Lakshadweep")	;						 
	
	replace region = "Central Zone" 
							if inlist(state, "Chhattisgarh", "Madhya Pradesh", 
									 "Uttar Pradesh", "Uttarakhand" );
									 
	replace region = "Eastern Zone"
							if inlist(state, "Bihar", "Jharkhand", 
									 "Odisha", "West Bengal" );
	
	replace region = "Western Zone"
							if inlist(state, "Goa", "Gujarat", 
									 "Maharashtra", "Daman and Diu",
									 "Dadra and Nagar Haveli");
									
	replace region = "North Eastern Zone"	
							if inlist(state, "Arunachal Pradesh", "Manipur", 
									 "Assam", "Meghalaya", "Mizoram",
									 "Nagaland", "Tripura", "Sikkim");
	#delimit cr
	
	collapse (sum) region_pop = state_pop, by(region)
	
	egen total_pop = total(region_pop)
	gen census_pop = region_pop/total_pop
	
	keep region census_pop
	
	tempfile census_region
	save `census_region'
	
	use `census_all', clear
	
	keep if state == "India"
	
	#delimit;
	gen age_group = cond( age >= 18 & age < 30, "Between 18 and 30 years old",
				  cond( age >= 30 & age < 50, "Between 30 and 50 years old",
				  cond( age >= 50 & age < 70, "Between 50 and 70 years old",
				  cond( age >= 70,            "More than 70 years old", "" ))));
	#delimit cr
	
	assert age_group != ""
	
	collapse (sum) age_pop = pop, by(age_group)
	egen tot_pop = total(age_pop)
	gen  census_pop = age_pop/tot_pop
	
	keep census_pop age_group
	ren age_group age
	
	tempfile census_age
	save `census_age', replace
	
	
/*******************************************************************************
	Outputs
*******************************************************************************/	

	cap file close output
	
	
	#delimit;
	file open output using Table_H_18.tex, write text replace;
	
	file write output "\begin{table}[htbp]\centering" _n
	"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}" _n
	"\caption{Population Distribution Comparison}" _n
	"\begin{tabular}{l*{2}{ccc}}" _n
	"\toprule" _n
	"   & Census 2011&      Survey\\" _n
	"\midrule" _n
	"\multicolumn{2}{l}{\textbf{Panel A: Age Comparison}} \\" _n
	"\midrule" _n;
	
	use `survey_age', clear;
	merge 1:1 age using `census_age', assert(3) nogen;
	
	replace survey_pop = survey_pop*100;
	replace census_pop = census_pop*100;
	
	xpose, clear varname;
	
	drop in 1;
	ren v1 age_30;
	ren v2 age_30_50;
	ren v3 age_50_70;
	ren v4 age_70_;
	
	sum age_30 if _varname == "census_pop"; 
	file write output "Between 18 and 30 years old&"  %10.1fc  (r(mean)) " &";
	sum age_30 if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) _n " \\" _n;
	
	sum age_30_50 if _varname == "census_pop" ;
	file write output "Between 30 and 50 years old&"  %10.1fc  (r(mean)) " &";
	sum age_30_50 if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) _n " \\" _n;
	
	sum age_50_70 if _varname == "census_pop" ;
	file write output "Between 50 and 70 years old&"  %10.1fc  (r(mean)) " &";
	sum age_50_70 if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) _n " \\" _n;	

	sum age_70_ if _varname == "census_pop" ;
	file write output "70 years old or above&"  %10.1fc  (r(mean)) " &";
	sum age_70_ if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) _n " \\" _n;	
	
	file write output "\midrule" _n
					  "\multicolumn{2}{l}{\textbf{Panel B: By Region}}\\" _n
					  "\midrule" _n;
					  
	use `survey_region', clear;
	
	merge 1:1 region using `census_region', assert(3) nogen;
	
	replace survey_pop = survey_pop*100;
	replace census_pop = census_pop*100;
	
	xpose, clear varname;
	
	drop in 1;
	
	ren v1 reg_Central;
	ren v2 reg_Eastern;
	ren v3 reg_Northeastern;
	ren v4 reg_Northern;
	ren v5 reg_Southern;
	ren v6 reg_Western;			
	
	sum reg_Central if _varname == "census_pop" ;
	file write output "Central Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Central if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) " \\" _n;
	
	sum reg_Eastern if _varname == "census_pop" ;
	file write output "Eastern Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Eastern if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) " \\" _n;
	
	sum reg_Northeastern if _varname == "census_pop" ;
	file write output "North Eastern Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Northeastern if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) " \\" _n;	

	sum reg_Northern if _varname == "census_pop" ;
	file write output "Northern Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Northern if _varname == "survey_pop" ;
	file write output  %10.1fc  (r(mean)) " \\" _n;	
	
	sum reg_Southern if _varname == "census_pop"; 
	file write output "Southern Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Southern if _varname == "survey_pop"; 
	file write output  %10.1fc  (r(mean)) " \\" _n;	
	
	sum reg_Western if _varname == "census_pop" ;
	file write output "Western Zone&"  %10.1fc  (r(mean)) " &";
	sum reg_Western if _varname == "survey_pop"; 
	file write output  %10.1fc  (r(mean)) " \\" _n ;	
	
	file write output "\bottomrule" _n "\end{tabular}" _n "\end{table}";
	
	file close output;
